Zhiang Chen, March 2017
This notebook is to get training dataset, validation dataset and test dataset. First, it reads the 24 pickle files. These 24 pickle files contain data from three different height of desk. For example, file [1-8] are from the height_1 of the desk; file [9-16] are from the height_2 of the desk; file [17-24] are from the height_3 of the desk. Two of the pickle files are randomly chosen from each 8 files to compose the validation dataset and test dataset. Three of them are randomly chosen as validation dataset, and the rest are the test dataset.
In [1]:
from six.moves import cPickle as pickle
import matplotlib.pyplot as plt
import os
from random import sample, shuffle
import numpy as np
In [2]:
files = os.listdir('pickle')
dataset = dict()
for file_name in files:
with open('pickle/'+file_name, 'rb') as f:
save = pickle.load(f)
dataset.setdefault(file_name, save['image'])
del save
In [3]:
v_t = sample(xrange(1,9),2) + sample(xrange(9,17),2) + sample(xrange(16,25),2)
shuffle(v_t)
valid = v_t[:3]
test = v_t[3:]
train = list(set(range(1,25)) - set(v_t))
def get_names(ls):
return ['p'+str(x) for x in ls]
train = get_names(train)
valid = get_names(valid)
test = get_names(test)
print('train',train)
print('valid',valid)
print('test',test)
def add_dic(x,y):
return dict(x.items() + y.items())
def get_data(name_list):
data = [dataset.get(name,False) for name in name_list]
return reduce(add_dic,data)
# the dictionary is {name:numpy}; for example, one of the names is '30-8-1-gball-288.png'
train_dataset = get_data(train)
valid_dataset = get_data(valid)
test_dataset = get_data(test)
In [4]:
non_orientations = ['empty','cup','tball','pball','gball']
image_size = 50
def label_data(data):
objects = list()
orientations = list()
values = list()
for name, value in data.iteritems():
obj = name.split('.')[0].split('-')[-2] # object name
ori = name.split('.')[0].split('-')[-1] # orientation
objects.append(obj)
if obj in non_orientations:
orientations.append(0)
elif obj == 'gstick':
if name.split('.')[0].split('-')[2] in ['1','3']:
orientations.append(0)
else:
orientations.append(int(ori))
else:
orientations.append(int(ori))
values.append(value.reshape(image_size,image_size,1).astype(np.float32))
return objects, orientations, values
train_objects, train_orientations, train_values = label_data(train_dataset)
valid_objects, valid_orientations, valid_values = label_data(valid_dataset)
test_objects, test_orientations, test_values = label_data(test_dataset)
In [5]:
object2value = {'empty':0,'duck':1,'cup':2,'sponge':3,'tball':4,'pball':5,'gball':6,'gstick':7,'nerf':8,'calc':9,'stapler':10}
value2object = dict((value,name) for name,value in object2value.items())
orientations = [18*x for x in range(20)]
def convert_objects(objects):
obj_values = np.asarray([object2value[obj] for obj in objects])
return (np.arange(len(object2value)) == obj_values[:,None]).astype(np.float32)
def convert_orientations(orientations):
ori_values = np.asarray(orientations)/18%10
return (np.arange(10) == ori_values[:,None]).astype(np.float32)
train_objects_ = convert_objects(train_objects)
valid_objects_ = convert_objects(valid_objects)
test_objects_ = convert_objects(test_objects)
train_orientations_ = convert_orientations(train_orientations)
valid_orientations_ = convert_orientations(valid_orientations)
test_orientations_ = convert_orientations(test_orientations)
train_values_ = np.asarray(train_values).astype(np.float32)
valid_values_ = np.asarray(valid_values).astype(np.float32)
test_values_ = np.asarray(test_values).astype(np.float32)
In [ ]:
data_file = 'depth_data'
with open(data_file,'wb') as f:
save={
'train_orientations':train_orientations_,
'valid_orientations':valid_orientations_,
'test_orientations':test_orientations_,
'train_objects':train_objects_,
'valid_objects':valid_objects_,
'test_objects':test_objects_,
'train_values':train_values_,
'valid_values':valid_values_,
'test_values':test_values_,
'object2value':object2value,
'value2object':value2object
}
pickle.dump(save,f)
f.close()
statinfo = os.stat(data_file)
file_size = float(statinfo.st_size)/1000
print('Compressed data size: %0.1fkB' % file_size)
In [9]:
def randomize(dataset, classes, angles):
permutation = np.random.permutation(classes.shape[0])
shuffled_dataset = dataset[permutation,:,:]
shuffled_classes = classes[permutation]
shuffled_angles = angles[permutation]
return shuffled_dataset, shuffled_classes, shuffled_angles
train_dataset, train_classes, train_angles = randomize(train_values_, train_objects_, train_orientations_)
small_data = train_dataset[0:100,:,:,:]
with open('small_data','wb') as f:
save={
'small_data':small_data,
}
pickle.dump(save,f,pickle.HIGHEST_PROTOCOL)
f.close()
##test
image = train_dataset[11,:,:,:].reshape(-1,image_size, image_size,1)
image = np.append(image,small_data,axis=0)
print(image.shape)